C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\ipykernel\__main__.py:14: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\ipykernel\__main__.py:15: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\ipykernel\__main__.py:16: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-2-fb4819ba599b> in <module>()
----> 1 get_ipython().run_cell_magic(u'time', u'', u'%%time\ntrain = pd.read_csv("../data/train_2013.csv", index_col=0)\ntrain = train.reset_index(drop=True)\nnp.random.seed(402)\ntrain = train.ix[np.random.choice(train.index, 50000)]\ntrain = train.reset_index(drop=True)\n\n\nprint(\'preprocessing train_data\')\nuse_col = ["srch_co","srch_ci","user_location_region",\\\n "hotel_market","srch_destination_id","hotel_country","srch_adults_cnt","srch_children_cnt","hotel_cluster"]\n\ntrain_y = train[["hotel_cluster"]]\n\ntrain_x = train[use_col]\ntrain_x["srch_ci"] = pd.to_datetime(train_x["srch_ci"], errors="coerce")\ntrain_x["srch_co"] = pd.to_datetime(train_x["srch_co"], errors="coerce")\ntrain_x["period"] = train_x["srch_co"] - train_x["srch_ci"]\ntrain_x["period"] = (train_x["period"] / np.timedelta64(1, \'D\')).astype(int)\ntrain_x = train_x.drop(["srch_co","srch_ci"], axis=1)\ntrain_x["srch_adults_cnt"] = train_x["srch_adults_cnt"].apply(lambda x: 3 if x>=3 else x)\ntrain_x = train_x.drop(["srch_children_cnt"], axis=1)\ntrain_x = train_x[["hotel_market","srch_destination_id","hotel_country","srch_adults_cnt","period","user_location_region"]]\n\n\n\nuse_col = ["srch_co","srch_ci","user_location_region",\\\n "hotel_market","srch_destination_id","hotel_country","srch_adults_cnt","srch_children_cnt"]\nprint("read the test.csv")\ntest = pd.read_csv("../data/test.csv")\ntest = test[use_col]\n\nprint("preprocessing test_data")\n\ntest["srch_ci"] = pd.to_datetime(test["srch_ci"], errors="coerce")\ntest["srch_co"] = pd.to_datetime(test["srch_co"], errors="coerce")\ntest["period"] = test["srch_co"] - test["srch_ci"]\ntest["period"] = (test["period"] / np.timedelta64(1, \'D\')).fillna(0.0).astype(int)\ntest = test.drop(["srch_co","srch_ci"], axis=1)\ntest["num"] = 1\ntest["srch_adults_cnt"] = test["srch_adults_cnt"].apply(lambda x: 3 if x>=3 else x)\ntest = test.drop(["num","srch_children_cnt"], axis=1)\n\ntest = test[["hotel_market","srch_destination_id","hotel_country","srch_adults_cnt","period","user_location_region"]]\n\nprint("modeling strart")\nmodel = RandomForestClassifier(n_estimators=10, max_depth=7, n_jobs=-1, random_state=777)\nprint(\'=\'*50)\nprint(\'# Test shape : {}\'.format(test.shape))\n\nmodel.fit(train_x,train_y)\n\npreds = model.predict_proba(test)\npreds = np.fliplr(np.argsort(preds, axis=1))\n\nprint("save file")\n\nresult_df = pd.DataFrame([ " ".join(row) for row in preds[:,:5].astype(str)], columns=["hotel_cluster"])\nresult_df.index.names = ["id"]\nfile_name = datetime.now().strftime("result_%Y%m%d%H%M%S") + \'.csv\'\nresult_df.to_csv(os.path.join(\'../output\',file_name), index=True)')
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\IPython\core\interactiveshell.pyc in run_cell_magic(self, magic_name, line, cell)
2113 magic_arg_s = self.var_expand(line, stack_depth)
2114 with self.builtin_trap:
-> 2115 result = fn(magic_arg_s, cell)
2116 return result
2117
<decorator-gen-60> in time(self, line, cell, local_ns)
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\IPython\core\magic.pyc in <lambda>(f, *a, **k)
186 # but it's overkill for just that one bit of state.
187 def magic_deco(arg):
--> 188 call = lambda f, *a, **k: f(*a, **k)
189
190 if callable(arg):
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\IPython\core\magics\execution.pyc in time(self, line, cell, local_ns)
1174 if mode=='eval':
1175 st = clock2()
-> 1176 out = eval(code, glob, local_ns)
1177 end = clock2()
1178 else:
<timed eval> in <module>()
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\IPython\core\interactiveshell.pyc in run_cell_magic(self, magic_name, line, cell)
2113 magic_arg_s = self.var_expand(line, stack_depth)
2114 with self.builtin_trap:
-> 2115 result = fn(magic_arg_s, cell)
2116 return result
2117
<decorator-gen-60> in time(self, line, cell, local_ns)
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\IPython\core\magic.pyc in <lambda>(f, *a, **k)
186 # but it's overkill for just that one bit of state.
187 def magic_deco(arg):
--> 188 call = lambda f, *a, **k: f(*a, **k)
189
190 if callable(arg):
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\IPython\core\magics\execution.pyc in time(self, line, cell, local_ns)
1178 else:
1179 st = clock2()
-> 1180 exec(code, glob, local_ns)
1181 end = clock2()
1182 out = None
<timed exec> in <module>()
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\pandas\core\generic.pyc in astype(self, dtype, copy, raise_on_error, **kwargs)
2948
2949 mgr = self._data.astype(dtype=dtype, copy=copy,
-> 2950 raise_on_error=raise_on_error, **kwargs)
2951 return self._constructor(mgr).__finalize__(self)
2952
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\pandas\core\internals.pyc in astype(self, dtype, **kwargs)
2936
2937 def astype(self, dtype, **kwargs):
-> 2938 return self.apply('astype', dtype=dtype, **kwargs)
2939
2940 def convert(self, **kwargs):
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\pandas\core\internals.pyc in apply(self, f, axes, filter, do_integrity_check, consolidate, raw, **kwargs)
2888
2889 kwargs['mgr'] = self
-> 2890 applied = getattr(b, f)(**kwargs)
2891 result_blocks = _extend_blocks(applied, result_blocks)
2892
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\pandas\core\internals.pyc in astype(self, dtype, copy, raise_on_error, values, **kwargs)
432 **kwargs):
433 return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
--> 434 values=values, **kwargs)
435
436 def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\pandas\core\internals.pyc in _astype(self, dtype, copy, raise_on_error, values, klass, mgr, **kwargs)
475
476 # _astype_nansafe works fine with 1-d only
--> 477 values = com._astype_nansafe(values.ravel(), dtype, copy=True)
478 values = values.reshape(self.shape)
479
C:\Users\Byeon\Anaconda3\envs\py27\lib\site-packages\pandas\core\common.pyc in _astype_nansafe(arr, dtype, copy)
1912
1913 if np.isnan(arr).any():
-> 1914 raise ValueError('Cannot convert NA to integer')
1915 elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
1916 # work around NumPy brokenness, #1987
ValueError: Cannot convert NA to integer